{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.llms import Ollama\n",
    "\n",
    "llm = Ollama(model='llama3', verbose=True)\n",
    "\n",
    "print(llm)\n",
    "\n",
    "llm.invoke(\"hello\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creating a retriever"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import WebBaseLoader\n",
    "\n",
    "loader = WebBaseLoader(\"https://docs.smith.langchain.com/overview\")\n",
    "\n",
    "data = loader.load()\n",
    "\n",
    "print(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据分割成小块\n",
    "\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
    "\n",
    "all_splits = text_splitter.split_documents(data)\n",
    "\n",
    "print(all_splits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(all_splits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将所有的块 存入向量数据库\n",
    "\n",
    "from langchain_chroma import Chroma\n",
    "from langchain_openai import OpenAIEmbeddings\n",
    "\n",
    "from langchain_community.embeddings import OllamaEmbeddings\n",
    "\n",
    "#  专门用于 openai 模型\n",
    "# vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())\n",
    "\n",
    "vectorstore = Chroma.from_documents(documents=all_splits, embedding=OllamaEmbeddings(model='llama3'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 在向量数据库中 检索一个字符串\n",
    "\n",
    "retriever = vectorstore.as_retriever(k=3)\n",
    "\n",
    "docs = retriever.invoke(\"Can LangSmith help test my LLM applications?\")\n",
    "\n",
    "print(docs)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Document chains\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create_stuff_documents_chain  创建context\n",
    "\n",
    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
    "\n",
    "\n",
    "SYSTEM_TEMPLATE = \"\"\"\n",
    "Answer the user's questions based on the below context. \n",
    "If the context doesn't contain any relevant information to the question, don't make something up and just say \"I don't know\":\n",
    "\n",
    "<context>\n",
    "{context}\n",
    "</context>\n",
    "\"\"\"\n",
    "\n",
    "question_answering_prompt = ChatPromptTemplate.from_messages(\n",
    "    [\n",
    "        (\"system\", SYSTEM_TEMPLATE),\n",
    "        MessagesPlaceholder(variable_name=\"messages\")\n",
    "    ]\n",
    ")\n",
    "\n",
    "document_chain = create_stuff_documents_chain(llm, question_answering_prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.messages import HumanMessage\n",
    "\n",
    "document_chain.invoke(\n",
    "    {\n",
    "        \"context\": docs,\n",
    "        \"messages\": [\n",
    "            HumanMessage(content=\"Can LangSmith help test my LLM applications?\")\n",
    "        ]\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# context 为空的情况\n",
    "\n",
    "document_chain.invoke(\n",
    "    {\n",
    "        \"context\": [],\n",
    "        \"messages\": [\n",
    "            HumanMessage(content=\"Can LangSmith help test my LLM applications?\")\n",
    "        ]\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Retrieval chains"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 整合 document_chain\n",
    "\n",
    "from typing import Dict\n",
    "from langchain_core.runnables import RunnablePassthrough\n",
    "\n",
    "def parse_retriever_input(params: Dict):\n",
    "    return params[\"messages\"][-1].content\n",
    "\n",
    "retrieval_chain = RunnablePassthrough.assign(\n",
    "    context=parse_retriever_input | retriever\n",
    ").assign(\n",
    "    answer=document_chain\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 利用整合后的检索链 ，查询信息\n",
    "\n",
    "retrieval_chain.invoke(\n",
    "    {\n",
    "        \"messages\": [\n",
    "            HumanMessage(content=\"Can LangSmith help test my LLM applications?\")\n",
    "        ]\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Query transformation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
